CUDA_VISIBLE_DEVICES=5,6 torchrun --nproc_per_node=2 train_multi_gpu_launch.py
